{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# pyquery"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 初始化"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 字符串初始化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<li class=\"item-0\">first item</li>\n",
      "         <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "         <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "         <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "     \n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div>\n",
    "    <ul>\n",
    "         <li class=\"item-0\">first item</li>\n",
    "         <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "         <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "         <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "     </ul>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "print(doc('li'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### URL初始化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<head><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\"/><meta http-equiv=\"X-UA-Compatible\" content=\"IE=Edge\"/><meta content=\"always\" name=\"referrer\"/><link rel=\"stylesheet\" type=\"text/css\" href=\"http://s1.bdstatic.com/r/www/cache/bdorz/baidu.min.css\"/><title>ç¾åº¦ä¸ä¸ï¼ä½ å°±ç¥é</title></head> \n"
     ]
    }
   ],
   "source": [
    "from pyquery import PyQuery as pq\n",
    "doc = pq(url='http://www.baidu.com')\n",
    "print(doc('head'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 文件初始化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<li class=\"item-0\">first item</li>\n",
      "         <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "         <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "         <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "     \n"
     ]
    }
   ],
   "source": [
    "from pyquery import PyQuery as pq\n",
    "doc = pq(filename='demo.html')\n",
    "print(doc('li'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 基本CSS选择器"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<li class=\"item-0\">first item</li>\n",
      "         <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "         <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "         <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "     \n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div id=\"container\">\n",
    "    <ul class=\"list\">\n",
    "         <li class=\"item-0\">first item</li>\n",
    "         <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "         <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "         <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "     </ul>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "print(doc('#container .list li'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 查找元素"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 子元素"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pyquery.pyquery.PyQuery'>\n",
      "<ul class=\"list\">\n",
      "         <li class=\"item-0\">first item</li>\n",
      "         <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "         <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "         <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "     </ul>\n",
      " \n",
      "<class 'pyquery.pyquery.PyQuery'>\n",
      "<li class=\"item-0\">first item</li>\n",
      "         <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "         <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "         <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "     \n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div id=\"container\">\n",
    "    <ul class=\"list\">\n",
    "         <li class=\"item-0\">first item</li>\n",
    "         <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "         <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "         <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "     </ul>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "items = doc('.list')\n",
    "print(type(items))\n",
    "print(items)\n",
    "lis = items.find('li')\n",
    "print(type(lis))\n",
    "print(lis)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pyquery.pyquery.PyQuery'>\n",
      "<li class=\"item-0\">first item</li>\n",
      "         <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "         <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "         <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "     \n"
     ]
    }
   ],
   "source": [
    "lis = items.children()\n",
    "print(type(lis))\n",
    "print(lis)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "         \n"
     ]
    }
   ],
   "source": [
    "lis = items.children('.active')\n",
    "print(lis)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 父元素"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pyquery.pyquery.PyQuery'>\n",
      "<div id=\"container\">\n",
      "    <ul class=\"list\">\n",
      "         <li class=\"item-0\">first item</li>\n",
      "         <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "         <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "         <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "     </ul>\n",
      " </div>\n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div id=\"container\">\n",
    "    <ul class=\"list\">\n",
    "         <li class=\"item-0\">first item</li>\n",
    "         <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "         <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "         <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "         <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "     </ul>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "items = doc('.list')\n",
    "container = items.parent()\n",
    "print(type(container))\n",
    "print(container)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pyquery.pyquery.PyQuery'>\n",
      "<div class=\"wrap\">\n",
      "    <div id=\"container\">\n",
      "        <ul class=\"list\">\n",
      "             <li class=\"item-0\">first item</li>\n",
      "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "         </ul>\n",
      "     </div>\n",
      " </div><div id=\"container\">\n",
      "        <ul class=\"list\">\n",
      "             <li class=\"item-0\">first item</li>\n",
      "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "         </ul>\n",
      "     </div>\n",
      " \n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    <div id=\"container\">\n",
    "        <ul class=\"list\">\n",
    "             <li class=\"item-0\">first item</li>\n",
    "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "         </ul>\n",
    "     </div>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "items = doc('.list')\n",
    "parents = items.parents()\n",
    "print(type(parents))\n",
    "print(parents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<div class=\"wrap\">\n",
      "    <div id=\"container\">\n",
      "        <ul class=\"list\">\n",
      "             <li class=\"item-0\">first item</li>\n",
      "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "         </ul>\n",
      "     </div>\n",
      " </div>\n"
     ]
    }
   ],
   "source": [
    "parent = items.parents('.wrap')\n",
    "print(parent)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 兄弟元素"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "             <li class=\"item-0\">first item</li>\n",
      "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "         \n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    <div id=\"container\">\n",
    "        <ul class=\"list\">\n",
    "             <li class=\"item-0\">first item</li>\n",
    "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "         </ul>\n",
    "     </div>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "li = doc('.list .item-0.active')\n",
    "print(li.siblings())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "             \n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    <div id=\"container\">\n",
    "        <ul class=\"list\">\n",
    "             <li class=\"item-0\">first item</li>\n",
    "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "         </ul>\n",
    "     </div>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "li = doc('.list .item-0.active')\n",
    "print(li.siblings('.active'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 遍历"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 单个元素"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             \n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    <div id=\"container\">\n",
    "        <ul class=\"list\">\n",
    "             <li class=\"item-0\">first item</li>\n",
    "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "         </ul>\n",
    "     </div>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "li = doc('.item-0.active')\n",
    "print(li)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'generator'>\n",
      "<li class=\"item-0\">first item</li>\n",
      "             \n",
      "<li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "             \n",
      "<li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             \n",
      "<li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "             \n",
      "<li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "         \n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    <div id=\"container\">\n",
    "        <ul class=\"list\">\n",
    "             <li class=\"item-0\">first item</li>\n",
    "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "         </ul>\n",
    "     </div>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "lis = doc('li').items()\n",
    "print(type(lis))\n",
    "for li in lis:\n",
    "    print(li)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 获取信息"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 获取属性"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<a href=\"link3.html\"><span class=\"bold\">third item</span></a>\n",
      "link3.html\n",
      "link3.html\n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    <div id=\"container\">\n",
    "        <ul class=\"list\">\n",
    "             <li class=\"item-0\">first item</li>\n",
    "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "         </ul>\n",
    "     </div>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "a = doc('.item-0.active a')\n",
    "print(a)\n",
    "print(a.attr('href'))\n",
    "print(a.attr.href)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 获取文本"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<a href=\"link3.html\"><span class=\"bold\">third item</span></a>\n",
      "third item\n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    <div id=\"container\">\n",
    "        <ul class=\"list\">\n",
    "             <li class=\"item-0\">first item</li>\n",
    "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "         </ul>\n",
    "     </div>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "a = doc('.item-0.active a')\n",
    "print(a)\n",
    "print(a.text())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 获取HTML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             \n",
      "<a href=\"link3.html\"><span class=\"bold\">third item</span></a>\n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    <div id=\"container\">\n",
    "        <ul class=\"list\">\n",
    "             <li class=\"item-0\">first item</li>\n",
    "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "         </ul>\n",
    "     </div>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "li = doc('.item-0.active')\n",
    "print(li)\n",
    "print(li.html())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## DOM操作"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### addClass、removeClass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             \n",
      "<li class=\"item-0\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             \n",
      "<li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             \n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    <div id=\"container\">\n",
    "        <ul class=\"list\">\n",
    "             <li class=\"item-0\">first item</li>\n",
    "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "         </ul>\n",
    "     </div>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "li = doc('.item-0.active')\n",
    "print(li)\n",
    "li.removeClass('active')\n",
    "print(li)\n",
    "li.addClass('active')\n",
    "print(li)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### attr、css"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             \n",
      "<li class=\"item-0 active\" name=\"link\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             \n",
      "<li class=\"item-0 active\" name=\"link\" style=\"font-size: 14px\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
      "             \n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    <div id=\"container\">\n",
    "        <ul class=\"list\">\n",
    "             <li class=\"item-0\">first item</li>\n",
    "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "         </ul>\n",
    "     </div>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "li = doc('.item-0.active')\n",
    "print(li)\n",
    "li.attr('name', 'link')\n",
    "print(li)\n",
    "li.css('font-size', '14px')\n",
    "print(li)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### remove"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hello, World This is a paragraph.\n",
      "Hello, World\n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    Hello, World\n",
    "    <p>This is a paragraph.</p>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "wrap = doc('.wrap')\n",
    "print(wrap.text())\n",
    "wrap.find('p').remove()\n",
    "print(wrap.text())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 其他DOM方法"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "http://pyquery.readthedocs.io/en/latest/api.html"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 伪类选择器"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<li class=\"item-0\">first item</li>\n",
      "             \n",
      "<li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "         \n",
      "<li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "             \n",
      "<li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
      "         \n",
      "<li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
      "             \n",
      "<li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
      "             \n"
     ]
    }
   ],
   "source": [
    "html = '''\n",
    "<div class=\"wrap\">\n",
    "    <div id=\"container\">\n",
    "        <ul class=\"list\">\n",
    "             <li class=\"item-0\">first item</li>\n",
    "             <li class=\"item-1\"><a href=\"link2.html\">second item</a></li>\n",
    "             <li class=\"item-0 active\"><a href=\"link3.html\"><span class=\"bold\">third item</span></a></li>\n",
    "             <li class=\"item-1 active\"><a href=\"link4.html\">fourth item</a></li>\n",
    "             <li class=\"item-0\"><a href=\"link5.html\">fifth item</a></li>\n",
    "         </ul>\n",
    "     </div>\n",
    " </div>\n",
    "'''\n",
    "from pyquery import PyQuery as pq\n",
    "doc = pq(html)\n",
    "li = doc('li:first-child')\n",
    "print(li)\n",
    "li = doc('li:last-child')\n",
    "print(li)\n",
    "li = doc('li:nth-child(2)')\n",
    "print(li)\n",
    "li = doc('li:gt(2)')\n",
    "print(li)\n",
    "li = doc('li:nth-child(2n)')\n",
    "print(li)\n",
    "li = doc('li:contains(second)')\n",
    "print(li)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "更多CSS选择器可以查看\n",
    "http://www.w3school.com.cn/css/index.asp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 官方文档"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "http://pyquery.readthedocs.io/"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
